Animation of graphs in R
Using previous age-sex pyramid based on 2021 data, to apply appropriate interactivity and animation methods to design an age-sex pyramid based data visualisation to show the changes of demographic structure of Singapore by age cohort and gender between 2000-2020 at planning area level. The data set used is entitle Singapore Residents by Planning Area / Subzone, Age Group, Sex and Type of Dwelling, June 2000-2010 and Singapore Residents by Planning Area / Subzone, Age Group, Sex and Type of Dwelling, June 2011-2020, from Department of Statistics home page.
packages = c('tidyverse', 'readxl', 'ggthemes')
for(p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
pop_data <- read_csv("data/respopagesextod2021.csv")
glimpse (data)
function (..., list = character(), package = NULL, lib.loc = NULL,
verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE)
summary_sex <- pop_data %>%
group_by(AG, Sex) %>%
summarise(Pop = sum(Pop)) %>%
ungroup()
head (summary_sex,5)
# A tibble: 5 x 3
AG Sex Pop
<chr> <chr> <dbl>
1 0_to_4 Females 87730
2 0_to_4 Males 91400
3 10_to_14 Females 97980
4 10_to_14 Males 102330
5 15_to_19 Females 100190
To ensure the age group is sorted into the ideal sequence, we set the order we want.
order <- c("0_to_4", "5_to_9", "10_to_14", "15_to_19", "20_to_24", "25_to_29", "30_to_34", "35_to_39", "40_to_44", "45_to_49", "50_to_54", "55_to_59", "60_to_64", "65_to_69", "70_to_74", "75_to_79", "80_to_84", "85_to_89", "90_and_over")
summary_sex1 <- summary_sex %>%
mutate(AG = factor(AG, levels = order)) %>%
arrange(AG)
head(summary_sex1,5)
# A tibble: 5 x 3
AG Sex Pop
<fct> <chr> <dbl>
1 0_to_4 Females 87730
2 0_to_4 Males 91400
3 5_to_9 Females 97120
4 5_to_9 Males 102390
5 10_to_14 Females 97980
ggplot(summary_sex1, aes(x = AG)) +
geom_bar(data=summary_sex1[summary_sex1$Sex=="Males",], aes(y=Pop*-1), stat="identity", fill="blue") +
geom_bar(data=summary_sex1[summary_sex1$Sex=="Females",], aes(y=Pop), stat="identity", fill="pink") +
geom_hline(yintercept=0, colour="white", lwd=1)+
coord_flip () +
scale_y_continuous(breaks = seq(-160000,160000,40000)) +
labs(y="Population", x="Gender") +
ggtitle(" Male Female")
For animation of population across time, data source can be found here at singstat website
Download 2 sets of data, 1) year 2000 to 2010, and 2) year 2011 to 2020 and combine into 1 set of data.
Check if the header has been copied into the data set by finding out the unique time brackets
year2000 <- read_csv("data/respopagesextod2000to2010.csv")
year2011 <- read_csv("data/respopagesextod2011to2020.csv")
head (year2000,3)
# A tibble: 3 x 7
PA SZ AG Sex TOD Pop Time
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 Ang Mo Kio Cheng San 0_to_4 Males HDB 1- and 2-Room Fla~ 20 2000
2 Ang Mo Kio Cheng San 0_to_4 Males HDB 3-Room Flats 480 2000
3 Ang Mo Kio Cheng San 0_to_4 Males HDB 4-Room Flats 220 2000
head (year2011,3)
# A tibble: 3 x 7
PA SZ AG Sex TOD Pop Time
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 Ang Mo Kio Ang Mo Kio Town Centre 0_to_4 Males HDB 1- a~ 0 2011
2 Ang Mo Kio Ang Mo Kio Town Centre 0_to_4 Males HDB 3-Ro~ 10 2011
3 Ang Mo Kio Ang Mo Kio Town Centre 0_to_4 Males HDB 4-Ro~ 30 2011
# Since columns are the same, we can combine the 2 files into 1 file for processing
combined <- rbind(year2000,year2011)
unique(combined$Time)
[1] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
[14] 2013 2014 2015 2016 2017 2018 2019 2020
# write_csv(combined, "combined.csv")
# in the Time column, there are only numbers, hence the row header was not copied into the data
To ensure the age group is sorted into the ideal sequence, we set the order we want.
order <- c("0_to_4", "5_to_9", "10_to_14", "15_to_19", "20_to_24", "25_to_29", "30_to_34", "35_to_39", "40_to_44", "45_to_49", "50_to_54", "55_to_59", "60_to_64", "65_to_69", "70_to_74", "75_to_79", "80_to_84", "85_to_89", "90_and_over")
combined <- combined %>%
mutate(AG = factor(AG, levels = order)) %>%
arrange(AG)
head(combined,5)
# A tibble: 5 x 7
PA SZ AG Sex TOD Pop Time
<chr> <chr> <fct> <chr> <chr> <dbl> <dbl>
1 Ang Mo Kio Cheng San 0_to_4 Males HDB 1- and 2-Room Fla~ 20 2000
2 Ang Mo Kio Cheng San 0_to_4 Males HDB 3-Room Flats 480 2000
3 Ang Mo Kio Cheng San 0_to_4 Males HDB 4-Room Flats 220 2000
4 Ang Mo Kio Cheng San 0_to_4 Males HDB 5-Room and Execut~ 80 2000
5 Ang Mo Kio Cheng San 0_to_4 Males HUDC Flats (excluding~ 0 2000
To plot the graph over the different years, we need to call out the Time field as a column (variable)
summary_sex_20 <- combined4 %>%
group_by(AG, Sex, Time, PA) %>%
summarise(Pop = sum(Pop)) %>%
ungroup()
head (summary_sex_20,5)
# A tibble: 5 x 5
AG Sex Time PA Pop
<fct> <chr> <dbl> <chr> <dbl>
1 0_to_4 Females 2000 Ang Mo Kio 290
2 0_to_4 Females 2000 Bedok 1620
3 0_to_4 Females 2000 Marine Parade 400
4 0_to_4 Females 2000 Punggol 0
5 0_to_4 Females 2001 Ang Mo Kio 250
Using the individual Age-sex pyramid from above (plotted for year 2021), we re-use the code to plot out 20 pyramid graphs, 1 graph for each year.
ggplot(summary_sex_20, aes(x=PA)) +
geom_bar(data=summary_sex_20[summary_sex_20$Sex=="Males",], aes(y=Pop*-1), stat="identity", fill="blue") +
geom_bar(data=summary_sex_20[summary_sex_20$Sex=="Females",], aes(y=Pop), stat="identity", fill="pink") +
geom_hline(yintercept=0, colour="white", lwd=1)+
coord_flip () +
scale_y_continuous(breaks = seq(-160000,160000,40000), labels = function(v) ifelse(abs(v)>=1000,paste0(abs(v)/1000, "K"), abs(v))) +
labs(title = "Age-Sex Population Pyramid, in 4 key areas 2021",
caption = 'Data Source: Department of Statistics (June 2021)',
y = "Population", x = "Gender") +
theme_bw() +
theme(legend.position = "none")+
theme(plot.title = element_text(size=16))+
theme(plot.subtitle = element_text(size=12))+
facet_wrap(. ~ `Time`,ncol=4)
It is clear from the 20 graphs displayed, that the difference in population year on year is not clear. To show more clarity, we use the year as base to transition the graph in 1 frame in the next section.
but first, we enhance the graph by
adding title caption theme find out the maximum and minimum values of the population to set the chart axis to ensure all the values will be captured properly.
We call out the package required which is ggaminate.
packages = c('gganimate')
for(p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
SG20 <- ggplot(summary_sex_20, aes(x=AG,colour=Sex,fill=Sex)) +
geom_bar(data=summary_sex_20[summary_sex_20$Sex=="Males",], aes(y=Pop*-1), stat="identity") +
geom_bar(data=summary_sex_20[summary_sex_20$Sex=="Females",], aes(y=Pop), stat="identity") +
geom_hline(yintercept=0, colour="white", lwd=1) +
coord_flip() +
scale_y_continuous(limits = c(-10000, 10000), n.breaks = 10, labels = function(v) ifelse(abs(v)>= 1000,paste0(abs(v)/1000, "K"), abs(v))) +
labs(title = "Singapore Age-Sex Population Pyramid for 20 years in 4 key areas",
subtitle = 'Year: "{round(frame_time, 0)}"',
caption = 'Data Source: Department of Statistics (June 2000 to June 2020)',
y = 'Male and Female Population',
x = 'Age Group') +
theme_bw () +
theme(legend.text = element_text(size=12))+
theme(plot.title = element_text(size=16))+
theme(plot.subtitle = element_text(size=10)) +
facet_grid(PA ~ .)
SG20
library(gganimate)
SG20_animated <- SG20 +
scale_y_continuous(limits = c(-8000, 8000), n.breaks = 10)+
transition_time(Time) +
ease_aes('linear')
SG20_animated
See below for 2 types of interactive plotting (datatable and 2 interactive graphs). Loading packages for interactive plots
packages = c('tidyverse', 'readxl', 'ggthemes', 'ggiraph', 'plotly',
'gganimate', 'patchwork', 'DT', 'gifski', 'gapminder')
for(p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
We create a new data set, and then use the function highlight_key to link both data set to enable the interactive highlight across 2 graphs
data <- summary_sex_20 %>%
group_by(Time, PA) %>%
summarise (Totalpopulation = sum(Pop))
d <-highlight_key(data)
p1 <- ggplot (data = d,
aes(x = Time, y = Totalpopulation, fill = PA)) +
geom_col () +
labs(title = 'Total population that has stayed in the planning area across 20 years')
p2 <- ggplot (data = d,
aes(x = Time, y = PA, fill = Time)) +
geom_col () +
labs(title = 'Total population that has stayed in the planning area across 20 years')
you can use the graphs to investigate when each area themselves has the highest population
subplot (ggplotly (p1),
ggplotly (p2))
or you can see the data table below the chart for data information
gg <- highlight(ggplotly(p1),
"plotly_selected")
crosstalk::bscols(gg,
DT::datatable(d),
widths = 15)
To illustrate the population in the 4 areas across the age group for 20 years (2000 - 2020).
animate1 <- ggplot (summary_sex_20, aes(x = AG, y = Pop/1000))+
geom_col () +
coord_flip() +
ggtitle('Planning area: {closest_state}') +
labs (x = 'AG',
y = 'Population (thousand)') +
transition_states (PA) +
ease_aes('linear') +
enter_fade() +
exit_fade()
animate(animate1,fps=3)